Load the necessary library packages prior to loading the data.

Packages used include :

-tidyr

-dplyr

-plotly

-plotly

-forcats


1. Read the beers data file Beers.csv and inspect the dataframe (head and tail).
library(readr)
Beers <- read_csv("Beers.csv")
## Parsed with column specification:
## cols(
##   Name = col_character(),
##   Beer_ID = col_double(),
##   ABV = col_double(),
##   IBU = col_double(),
##   Brewery_id = col_double(),
##   Style = col_character(),
##   Ounces = col_double()
## )
head(Beers)
## # A tibble: 6 x 7
##   Name           Beer_ID   ABV   IBU Brewery_id Style                Ounces
##   <chr>            <dbl> <dbl> <dbl>      <dbl> <chr>                 <dbl>
## 1 Pub Beer          1436 0.05     NA        409 American Pale Lager      12
## 2 Devil's Cup       2265 0.066    NA        178 American Pale Ale (…     12
## 3 Rise of the P…    2264 0.071    NA        178 American IPA             12
## 4 Sinister          2263 0.09     NA        178 American Double / I…     12
## 5 Sex and Candy     2262 0.075    NA        178 American IPA             12
## 6 Black Exodus      2261 0.077    NA        178 Oatmeal Stout            12
tail(Beers)
## # A tibble: 6 x 7
##   Name               Beer_ID   ABV   IBU Brewery_id Style            Ounces
##   <chr>                <dbl> <dbl> <dbl>      <dbl> <chr>             <dbl>
## 1 Rocky Mountain Oy…    1035 0.075    NA        425 American Stout       12
## 2 Belgorado              928 0.067    45        425 Belgian IPA          12
## 3 Rail Yard Ale          807 0.052    NA        425 American Amber …     12
## 4 B3K Black Lager        620 0.055    NA        425 Schwarzbier          12
## 5 Silverback Pale A…     145 0.055    40        425 American Pale A…     12
## 6 Rail Yard Ale (20…      84 0.052    NA        425 American Amber …     12
2. Read the breweries data file Breweries.csv and inspect the dataframe (head and tail).
library(readr)
Breweries <- read_csv("Breweries.csv")
## Parsed with column specification:
## cols(
##   Brew_ID = col_double(),
##   Name = col_character(),
##   City = col_character(),
##   State = col_character()
## )
head(Breweries)
## # A tibble: 6 x 4
##   Brew_ID Name                      City          State
##     <dbl> <chr>                     <chr>         <chr>
## 1       1 NorthGate Brewing         Minneapolis   MN   
## 2       2 Against the Grain Brewery Louisville    KY   
## 3       3 Jack's Abby Craft Lagers  Framingham    MA   
## 4       4 Mike Hess Brewing Company San Diego     CA   
## 5       5 Fort Point Beer Company   San Francisco CA   
## 6       6 COAST Brewing Company     Charleston    SC
tail(Breweries)
## # A tibble: 6 x 4
##   Brew_ID Name                          City          State
##     <dbl> <chr>                         <chr>         <chr>
## 1     553 Mickey Finn's Brewery         Libertyville  IL   
## 2     554 Covington Brewhouse           Covington     LA   
## 3     555 Dave's Brewfarm               Wilson        WI   
## 4     556 Ukiah Brewing Company         Ukiah         CA   
## 5     557 Butternuts Beer and Ale       Garrattsville NY   
## 6     558 Sleeping Lady Brewing Company Anchorage     AK
3. Inspect the data structures, rename columns that make sense and prepare to join the data sets. Joining could be accomplished with a left, right or full join. Full join is used in this case to preserve data from both tables.
str(Breweries)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 558 obs. of  4 variables:
##  $ Brew_ID: num  1 2 3 4 5 6 7 8 9 10 ...
##  $ Name   : chr  "NorthGate Brewing" "Against the Grain Brewery" "Jack's Abby Craft Lagers" "Mike Hess Brewing Company" ...
##  $ City   : chr  "Minneapolis" "Louisville" "Framingham" "San Diego" ...
##  $ State  : chr  "MN" "KY" "MA" "CA" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Brew_ID = col_double(),
##   ..   Name = col_character(),
##   ..   City = col_character(),
##   ..   State = col_character()
##   .. )
str(Beers)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 2410 obs. of  7 variables:
##  $ Name      : chr  "Pub Beer" "Devil's Cup" "Rise of the Phoenix" "Sinister" ...
##  $ Beer_ID   : num  1436 2265 2264 2263 2262 ...
##  $ ABV       : num  0.05 0.066 0.071 0.09 0.075 0.077 0.045 0.065 0.055 0.086 ...
##  $ IBU       : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Brewery_id: num  409 178 178 178 178 178 178 178 178 178 ...
##  $ Style     : chr  "American Pale Lager" "American Pale Ale (APA)" "American IPA" "American Double / Imperial IPA" ...
##  $ Ounces    : num  12 12 12 12 12 12 12 12 12 12 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Name = col_character(),
##   ..   Beer_ID = col_double(),
##   ..   ABV = col_double(),
##   ..   IBU = col_double(),
##   ..   Brewery_id = col_double(),
##   ..   Style = col_character(),
##   ..   Ounces = col_double()
##   .. )
names(Breweries)
## [1] "Brew_ID" "Name"    "City"    "State"
Renamed Brewery_id to match the Brew_ID inthe Breweries dataset.
names(Beers)
## [1] "Name"       "Beer_ID"    "ABV"        "IBU"        "Brewery_id"
## [6] "Style"      "Ounces"
Beers <- rename(Beers, Brew_ID = Brewery_id)
names(Beers)
## [1] "Name"    "Beer_ID" "ABV"     "IBU"     "Brew_ID" "Style"   "Ounces"
Full join method.
fullData <- full_join(Breweries, Beers, by = "Brew_ID")
head(fullData)
## # A tibble: 6 x 10
##   Brew_ID Name.x   City   State Name.y  Beer_ID   ABV   IBU Style    Ounces
##     <dbl> <chr>    <chr>  <chr> <chr>     <dbl> <dbl> <dbl> <chr>     <dbl>
## 1       1 NorthGa… Minne… MN    Get To…    2692 0.045    50 America…     16
## 2       1 NorthGa… Minne… MN    Maggie…    2691 0.049    26 Milk / …     16
## 3       1 NorthGa… Minne… MN    Wall's…    2690 0.048    19 English…     16
## 4       1 NorthGa… Minne… MN    Pumpion    2689 0.06     38 Pumpkin…     16
## 5       1 NorthGa… Minne… MN    Strong…    2688 0.06     25 America…     16
## 6       1 NorthGa… Minne… MN    Parape…    2687 0.056    47 Extra S…     16
Another column renaming is needed to correct the data names after the join.
fullData <- rename(fullData, BreweryName = Name.x, BeerName = Name.y)
names(fullData)
##  [1] "Brew_ID"     "BreweryName" "City"        "State"       "BeerName"   
##  [6] "Beer_ID"     "ABV"         "IBU"         "Style"       "Ounces"
fullData %>%
  group_by(State) %>%
  summarise(count = n())
## # A tibble: 51 x 2
##    State count
##    <chr> <int>
##  1 AK       25
##  2 AL       10
##  3 AR        5
##  4 AZ       47
##  5 CA      183
##  6 CO      265
##  7 CT       27
##  8 DC        8
##  9 DE        2
## 10 FL       58
## # … with 41 more rows
Exploring the number of breweries in each state, the Breweries.csv data is used. Grouped the Breweries data table by states.
BrewStatesC <- Breweries%>%
  group_by(State)%>%
  summarise(count = n())
head(BrewStatesC)
## # A tibble: 6 x 2
##   State count
##   <chr> <int>
## 1 AK        7
## 2 AL        3
## 3 AR        2
## 4 AZ       11
## 5 CA       39
## 6 CO       47
state.freq <- table(Breweries$State)
barplot(state.freq[order(state.freq)], 
        horiz = T,
        border = NA,
        xlim = c(0, 100),
        main = "Number of Breweries per State",
        xlab = "Number of Brewiews",
        ylab = "States")

#
# Counting the number of brewweries in each state:

head(state.freq)
## 
## AK AL AR AZ CA CO 
##  7  3  2 11 39 47
sDF<-data.frame(state.freq) 
sDF<- rename(sDF, State = Var1, Brew_Count=Freq)


Plot the number of breweries in each state.
p1 <- ggplot(Breweries, aes(x = fct_rev(fct_infreq(State)), fill = fct_infreq(State))) + geom_bar() + coord_flip() + theme(legend.position = "top")

ggplotly(p1)  
Plot the number of beer brands from each state.
p2 <- ggplot(fullData, aes(x = fct_rev(fct_infreq(State)), fill = fct_rev(fct_infreq(State)))) + geom_bar() + coord_flip() + theme(legend.position = "top")

ggplotly(p2)
Did we have any empty rows in our data tables? Let’s find out here:
naB_Id<-sum(is.na(fullData$Brew_ID))
naB_Nm<-sum(is.na(fullData$BreweryName))
naB_C<-sum(is.na(fullData$City))
naB_S<-sum(is.na(fullData$State))
naBeer_Nm<-sum(is.na(fullData$BeerName))
naBeer_Id<-sum(is.na(fullData$Beer_ID))
naABV<-sum(is.na(fullData$ABV))
naIBU<-sum(is.na(fullData$IBU))
naStyle<-sum(is.na(fullData$Style))
naOunces<-sum(is.na(fullData$Ounces))


naResults = paste(naB_Id, naB_Nm, naB_C, naB_S, naBeer_Nm, naBeer_Id, naABV, naIBU, naStyle, naOunces, sep = ", ")

print(naResults)
## [1] "0, 0, 0, 0, 0, 0, 62, 1005, 5, 0"